Resnet V0.¶

Import¶

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import timm

from sklearn.model_selection import KFold

# GPU 사용 가능 여부 확인
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'사용 디바이스: {device}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
사용 디바이스: cuda
GPU: NVIDIA GeForce RTX 4060 Laptop GPU

Data Load¶

In [2]:
# 하이퍼파라미터
img_size = 256  
batch_size = 64
num_classes = 10
num_epochs = 50
# learning_rate = 0.0003 -> train section
num_workers = 0
version = "0.0"

# 데이터 경로
base_dir = r'c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj'
driver_csv_path = os.path.join(base_dir, 'data', 'driver_imgs_list.csv')
train_dir = os.path.join(base_dir, 'data', 'imgs', 'train')
test_dir = os.path.join(base_dir, 'data', 'imgs', 'test')
best_model_path = f'models/inception_v{version}.pth'

print(f"Train directory: {train_dir}")
print(f"Test directory: {test_dir}")
print(f"이미지 크기: {img_size}x{img_size}")
print(f"배치 크기: {batch_size}")
Train directory: c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\data\imgs\train
Test directory: c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\data\imgs\test
이미지 크기: 256x256
배치 크기: 64
In [3]:
driver_df = pd.read_csv(driver_csv_path)

print(f"고유 운전자 수: {driver_df['subject'].nunique()}명")
print(f"운전자 목록: {sorted(driver_df['subject'].unique())}")

driver_counts = driver_df['subject'].value_counts().sort_index()

plt.figure(figsize=(12, 6))
plt.bar(driver_counts.index.astype(str), driver_counts.values, color='C0', alpha=0.9)
plt.xlabel('Driver')
plt.ylabel('Image Count')
plt.title('Images per Driver')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()
고유 운전자 수: 26명
운전자 목록: ['p002', 'p012', 'p014', 'p015', 'p016', 'p021', 'p022', 'p024', 'p026', 'p035', 'p039', 'p041', 'p042', 'p045', 'p047', 'p049', 'p050', 'p051', 'p052', 'p056', 'p061', 'p064', 'p066', 'p072', 'p075', 'p081']
No description has been provided for this image

5-Fold Cross Validation¶

fold별 운전자 목록을 미리 나누긴 하되 초기 실험 단계에서는 한 폴드(fold 2, 학습 데이터가 가장 많아서..)만 사용하고 이후 마무리 단계에서 전체 폴드 다 돌려서 일반화 성능 끌어올리는 방향으로 진행.

split train data into 5 folds¶

In [4]:
all_drivers = sorted(driver_df['subject'].unique())

n_folds = 5
kfold = KFold(n_splits=n_folds, shuffle=True, random_state=42)

fold_splits = []
for fold_idx, (train_indices, val_indices) in enumerate(kfold.split(all_drivers)):
	train_drivers = [all_drivers[i] for i in train_indices]
	val_drivers = [all_drivers[i] for i in val_indices]

	fold_splits.append({
		'fold': fold_idx+1,
		'train_drivers': train_drivers,
		'val_drivers': val_drivers
	})

	print("Fold", fold_idx+1)
	print("train:", train_drivers, "val:", val_drivers)
	
	train_imgs = driver_df[driver_df['subject'].isin(train_drivers)]
	val_imgs = driver_df[driver_df['subject'].isin(val_drivers)]
	print(f"학습 이미지: {len(train_imgs)}개")
	print(f"검증 이미지: {len(val_imgs)}개")
Fold 1
train: ['p012', 'p014', 'p015', 'p016', 'p021', 'p022', 'p024', 'p039', 'p042', 'p045', 'p047', 'p049', 'p051', 'p052', 'p056', 'p061', 'p064', 'p066', 'p072', 'p081'] val: ['p002', 'p026', 'p035', 'p041', 'p050', 'p075']
학습 이미지: 17446개
검증 이미지: 4978개
Fold 2
train: ['p002', 'p015', 'p016', 'p022', 'p024', 'p026', 'p035', 'p039', 'p041', 'p042', 'p047', 'p049', 'p050', 'p051', 'p052', 'p056', 'p061', 'p064', 'p066', 'p075', 'p081'] val: ['p012', 'p014', 'p021', 'p045', 'p072']
학습 이미지: 18418개
검증 이미지: 4006개
Fold 3
train: ['p002', 'p012', 'p014', 'p021', 'p022', 'p024', 'p026', 'p035', 'p039', 'p041', 'p045', 'p047', 'p050', 'p051', 'p052', 'p056', 'p061', 'p066', 'p072', 'p075', 'p081'] val: ['p015', 'p016', 'p042', 'p049', 'p064']
학습 이미지: 18049개
검증 이미지: 4375개
Fold 4
train: ['p002', 'p012', 'p014', 'p015', 'p016', 'p021', 'p022', 'p024', 'p026', 'p035', 'p039', 'p041', 'p042', 'p045', 'p047', 'p049', 'p050', 'p056', 'p064', 'p072', 'p075'] val: ['p051', 'p052', 'p061', 'p066', 'p081']
학습 이미지: 18098개
검증 이미지: 4326개
Fold 5
train: ['p002', 'p012', 'p014', 'p015', 'p016', 'p021', 'p026', 'p035', 'p041', 'p042', 'p045', 'p049', 'p050', 'p051', 'p052', 'p061', 'p064', 'p066', 'p072', 'p075', 'p081'] val: ['p022', 'p024', 'p039', 'p047', 'p056']
학습 이미지: 17685개
검증 이미지: 4739개

define DraiverDataset¶

In [5]:
class DriverDataset(Dataset):
    """운전자 행동 데이터셋"""
    
    def __init__(self, data_dir, driver_df, driver_list, transform=None, is_test=False):
        self.data_dir = data_dir
        self.transform = transform
        self.is_test = is_test
        self.images = []
        self.labels = []
        
        if is_test:
            test_images_dir = data_dir
            for img_name in os.listdir(test_images_dir):
                self.images.append(os.path.join(test_images_dir, img_name))
        else: #is_train
            driver_subset = driver_df[driver_df['subject'].isin(driver_list)]

            for _, row in driver_subset.iterrows():
                class_name = row['classname']
                img_name = row['img']
                img_path = os.path.join(data_dir, class_name, img_name)

                self.images.append(img_path)
                class_idx = int(class_name[1:])
                self.labels.append(class_idx)
        print(f"{'테스트' if is_test else '운전자' + str(len(driver_list))+'명'}, 데이터 {len(self.images)}개 이미지")
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        if self.is_test:
            return image, os.path.basename(img_path)
        else:
            label = self.labels[idx]
            return image, label

define online team transform¶

In [6]:
team_transform_train = transforms.Compose([
	transforms.Resize((img_size, img_size)),
	transforms.RandomRotation(degrees=10),
	transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),

	transforms.ToTensor(),
	transforms.RandomErasing(p=0.25),
	transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
])

team_transform_eval = transforms.Compose([
	transforms.Resize((img_size, img_size)),
	transforms.ToTensor(),
	transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
])

train¶

In [7]:
def train_epoch(model, train_loader, criterion, optimizer, device):
    """
    한 에폭(Epoch) 동안 모델을 학습시키고 손실과 정확도를 반환합니다.
    """
    model.train()  # 모델을 학습 모드로 설정
    
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    # 훈련 데이터셋의 모든 배치를 반복합니다.
    for inputs, labels in tqdm(train_loader, desc="Training"):
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # 그래디언트 초기화
        optimizer.zero_grad()
        
        # 순전파 (Forward pass)
        outputs = model(inputs)
        loss = criterion(outputs, labels) # Log Loss (CrossEntropyLoss) 계산
        
        # 역전파 및 최적화 (Backward pass and optimization)
        loss.backward()
        optimizer.step()
        
        # 통계 업데이트
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct_predictions += torch.sum(preds == labels.data)
        total_samples += inputs.size(0)
        
    epoch_loss = running_loss / total_samples
    epoch_acc = (correct_predictions.double() / total_samples) * 100
    
    return epoch_loss, epoch_acc.item()
In [8]:
def validate(model, val_loader, criterion, device):
    """
    검증 데이터셋을 이용해 모델의 성능(손실과 정확도)을 평가하고 반환합니다.
    """
    model.eval()  # 모델을 평가 모드로 설정
    
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    # 그래디언트 계산을 비활성화합니다.
    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc="Validating"):
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # 순전파
            outputs = model(inputs)
            loss = criterion(outputs, labels) # Log Loss (CrossEntropyLoss) 계산
            
            # 통계 업데이트
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct_predictions += torch.sum(preds == labels.data)
            total_samples += inputs.size(0)

    epoch_loss = running_loss / total_samples
    epoch_acc = (correct_predictions.double() / total_samples) * 100
    
    # epoch_loss가 Log Loss에 해당하며, 이 값을 기준으로 최고 성능 모델을 저장합니다.
    return epoch_loss, epoch_acc.item()
In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import SequentialLR, LinearLR, CosineAnnealingLR
from torch.utils.data import DataLoader
import timm
import numpy as np
from sklearn.metrics import f1_score, log_loss
from tqdm import tqdm  # ⭐ 추가

def train_fold(fold_idx, train_drivers, val_drivers):
    """
    EXP-1: ResNet-50-D baseline (RGB only)
      - Optimizer: AdamW
      - Scheduler: Warmup(5ep) -> CosineAnnealing
      - Loss: CrossEntropy(label_smoothing=0.1)
      - Early Stop / Best Save: macro-F1
      - 추가 로깅: multiclass logloss
    """
    print(f"==== Fold {fold_idx}/{n_folds} ====")

    # ========== 데이터셋 & 로더 ==========
    train_dataset = DriverDataset(
        train_dir, driver_df, train_drivers,
        transform=team_transform_train, is_test=False
    )
    val_dataset = DriverDataset(
        train_dir, driver_df, val_drivers,
        transform=team_transform_eval, is_test=False
    )

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )

    print(f"학습 배치 수: {len(train_loader)}")
    print(f"검증 배치 수: {len(val_loader)}")

    # ========== 모델 ==========
    model = timm.create_model(
        'resnet26d',
        pretrained=True,
        num_classes=num_classes
    ).to(device)

    for p in model.parameters():
        p.requires_grad = True

    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    print("\n" + "="*70)
    print("📊 모델 파라미터 통계 (EXP-1: ResNet-26-D)")
    print("="*70)
    print(f"  전체 파라미터:      {total_params:>15,}")
    print(f"  학습 가능 파라미터:  {trainable_params:>15,} ({100*trainable_params/total_params:>6.2f}%)")
    print("="*70 + "\n")

    # ========== 손실 & 옵티마이저 & 스케줄러 ==========
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=3e-4,
        weight_decay=5e-2
    )

    warmup_epochs = 5
    cosine_epochs = max(1, num_epochs - warmup_epochs)
    scheduler = SequentialLR(
        optimizer,
        schedulers=[
            LinearLR(optimizer, start_factor=0.01, end_factor=1.0, total_iters=warmup_epochs),
            CosineAnnealingLR(optimizer, T_max=cosine_epochs)
        ],
        milestones=[warmup_epochs]
    )

    # ========== 히스토리 ==========
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],
        'val_macro_f1': [],
        'val_logloss': [],
        'learning_rates': []
    }

    # ========== Early Stopping ==========
    early_stop_patience = 15
    patience_counter = 0
    best_metric = -float('inf')
    best_model_path = f'models/best_exp1_resnet50d_fold{fold_idx}.pth'

    print(f"\n⏱️ Early Stopping Patience (macro-F1 기준): {early_stop_patience} ep\n")
    print("="*70)
    print("🚀 학습 시작 (EXP-1)")
    print("="*70)

    def eval_on_loader(model, loader, criterion):
        """검증 루프: loss/acc + macro-F1 + multiclass logloss 계산"""
        model.eval()
        total, correct, running_loss = 0, 0, 0.0
        all_probs, all_labels = [], []

        # ⭐ tqdm 추가
        with torch.no_grad():
            pbar = tqdm(loader, desc='Validating', leave=False)
            for images, labels in pbar:
                images = images.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)

                logits = model(images)
                loss = criterion(logits, labels)

                running_loss += loss.item() * labels.size(0)
                preds = logits.argmax(dim=1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

                probs = logits.softmax(dim=1).detach().cpu().numpy()
                all_probs.append(probs)
                all_labels.append(labels.detach().cpu().numpy())

                # ⭐ 실시간 업데이트
                current_acc = 100.0 * correct / total
                pbar.set_postfix({
                    'loss': f'{loss.item():.4f}',
                    'acc': f'{current_acc:.2f}%'
                })

        avg_loss = running_loss / max(1, total)
        acc = 100.0 * correct / max(1, total)

        all_probs = np.concatenate(all_probs, axis=0)
        all_labels = np.concatenate(all_labels, axis=0)

        macro_f1 = f1_score(all_labels, np.argmax(all_probs, axis=1), average='macro')
        mlogloss = log_loss(all_labels, all_probs, labels=list(range(num_classes)))

        return avg_loss, acc, macro_f1, mlogloss

    # ========== 에폭 루프 ==========
    for epoch in range(num_epochs):
        print(f'\n{"="*70}')
        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'{"="*70}')

        current_lr = optimizer.param_groups[0]['lr']
        history['learning_rates'].append(current_lr)

        # ======= Train (⭐ tqdm 추가) =======
        model.train()
        train_loss, train_correct, train_total = 0.0, 0, 0

        pbar = tqdm(train_loader, desc='Training', leave=False)
        for images, labels in pbar:
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            optimizer.zero_grad(set_to_none=True)
            logits = model(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * labels.size(0)
            preds = logits.argmax(dim=1)
            train_correct += (preds == labels).sum().item()
            train_total += labels.size(0)

            # ⭐ 실시간 업데이트
            current_acc = 100.0 * train_correct / train_total
            pbar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'acc': f'{current_acc:.2f}%',
                'lr': f'{current_lr:.6f}'
            })

        epoch_train_loss = train_loss / max(1, train_total)
        epoch_train_acc = 100.0 * train_correct / max(1, train_total)

        # ======= Validate =======
        val_loss, val_acc, val_macro_f1, val_logloss = eval_on_loader(model, val_loader, criterion)

        scheduler.step()

        history['train_loss'].append(epoch_train_loss)
        history['train_acc'].append(epoch_train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['val_macro_f1'].append(val_macro_f1)
        history['val_logloss'].append(val_logloss)

        print(f'\n📊 Epoch {epoch+1} 결과:')
        print(f'  Train Loss: {epoch_train_loss:.4f} | Train Acc: {epoch_train_acc:.2f}%')
        print(f'  Val   Loss: {val_loss:.4f} | Val Acc:   {val_acc:.2f}%')
        print(f'  Val Macro-F1: {val_macro_f1:.4f} | Val LogLoss: {val_logloss:.4f}')
        print(f'  LR: {current_lr:.6f}')

        # ======= Best 저장 & Early Stopping =======
        if val_macro_f1 > best_metric:
            best_metric = val_macro_f1
            patience_counter = 0

            torch.save({
                'fold': fold_idx,
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'history': history,
                'val_loss': val_loss,
                'val_acc': val_acc,
                'val_macro_f1': val_macro_f1,
                'val_logloss': val_logloss,
                'model_name': 'resnet50d',
                'freeze_mode': 'full_finetune',
                'trainable_params': trainable_params,
                'total_params': total_params,
            }, best_model_path)

            print(f'  ✅ 최고 성능(매크로F1) 모델 저장! (Macro-F1: {val_macro_f1:.4f})')

        else:
            patience_counter += 1
            print(f'  ⏳ Early Stopping 카운터: {patience_counter}/{early_stop_patience}')
            if patience_counter >= early_stop_patience:
                print(f'\n{"="*70}')
                print(f'🛑 Early Stopping 발동! (Epoch {epoch+1}) — macro-F1 개선 없음')
                print(f'   최고 Macro-F1: {best_metric:.4f}')
                print(f'   모델 저장 경로: {best_model_path}')
                print(f'{"="*70}')
                break

    final_epoch = epoch + 1
    print("\n" + "="*70)
    print(f"✅ Fold {fold_idx} 학습 완료! (EXP-1)")
    print("="*70)
    print(f"  총 학습 에폭: {final_epoch}/{num_epochs}")
    print(f"  최고 Macro-F1: {max(history['val_macro_f1']):.4f}")
    print(f"  최저 Val Loss: {min(history['val_loss']):.4f}")
    print(f"  최저 LogLoss:  {min(history['val_logloss']):.4f}")
    print(f"  최고 Val Acc:  {max(history['val_acc']):.2f}%")
    print(f"  모델 저장: {best_model_path}")
    print(f"  학습 파라미터: {trainable_params:,} / {total_params:,} ({100*trainable_params/total_params:.2f}%)")
    print("="*70)

    return {
        'fold': fold_idx,
        'history': history,
        'best_macro_f1': max(history['val_macro_f1']),
        'best_val_loss': min(history['val_loss']),
        'best_val_logloss': min(history['val_logloss']),
        'best_val_acc': max(history['val_acc']),
        'model_path': best_model_path,
        'stopped_epoch': final_epoch,
        'model_name': 'resnet50d',
        'freeze_mode': 'full_finetune',
        'trainable_params': trainable_params,
        'total_params': total_params
    }
In [12]:
# ========== Fold 2 학습 (Inception-C부터 Fine-tuning) ==========

all_fold_results = []

fold_info = fold_splits[1]  # Fold 2
fold_idx = fold_info['fold']
train_drivers = fold_info['train_drivers']
val_drivers = fold_info['val_drivers']


print("==== Resnet v0 ====")


# Inception-C부터 학습
fold_result = train_fold(
    fold_idx, 
    train_drivers, 
    val_drivers,
)

all_fold_results.append(fold_result)

# 메모리 정리
torch.cuda.empty_cache()

# 결과 출력
print(f"\n{'='*70}")
print("📊 최종 결과")
print(f"{'='*70}")
print(f"  Fold: {fold_result['fold']}")
print(f"  최저 Val Loss: {fold_result['best_val_loss']:.4f}")
print(f"  해당 Val Acc: {fold_result['best_val_acc']:.2f}%")
print(f"  최고 Val Acc: {fold_result['max_val_acc']:.2f}%")
print(f"  학습 완료 에폭: {fold_result['stopped_epoch']}")
print(f"  Freeze 모드: {fold_result['freeze_mode']}")
print(f"  학습 파라미터: {fold_result['trainable_params']:,} / {fold_result['total_params']:,}")
print(f"{'='*70}")
==== Resnet v0 ====
==== Fold 2/5 ====
운전자21명, 데이터 18418개 이미지
운전자5명, 데이터 4006개 이미지
학습 배치 수: 288
검증 배치 수: 63

======================================================================
📊 모델 파라미터 통계 (EXP-1: ResNet-26-D)
======================================================================
  전체 파라미터:           13,985,898
  학습 가능 파라미터:       13,985,898 (100.00%)
======================================================================


⏱️ Early Stopping Patience (macro-F1 기준): 15 ep

======================================================================
🚀 학습 시작 (EXP-1)
======================================================================

======================================================================
Epoch 1/50
======================================================================
                                                                                                 
📊 Epoch 1 결과:
  Train Loss: 2.2178 | Train Acc: 26.28%
  Val   Loss: 2.1787 | Val Acc:   28.53%
  Val Macro-F1: 0.2455 | Val LogLoss: 2.1622
  LR: 0.000003
  ✅ 최고 성능(매크로F1) 모델 저장! (Macro-F1: 0.2455)

======================================================================
Epoch 2/50
======================================================================
                                                                                                 
📊 Epoch 2 결과:
  Train Loss: 0.7937 | Train Acc: 91.48%
  Val   Loss: 0.8684 | Val Acc:   86.52%
  Val Macro-F1: 0.8630 | Val LogLoss: 0.5725
  LR: 0.000062
  ✅ 최고 성능(매크로F1) 모델 저장! (Macro-F1: 0.8630)

======================================================================
Epoch 3/50
======================================================================
                                                                                                 
📊 Epoch 3 결과:
  Train Loss: 0.5743 | Train Acc: 97.96%
  Val   Loss: 0.8373 | Val Acc:   86.77%
  Val Macro-F1: 0.8622 | Val LogLoss: 0.5223
  LR: 0.000122
  ⏳ Early Stopping 카운터: 1/15

======================================================================
Epoch 4/50
======================================================================
                                                                                                 
📊 Epoch 4 결과:
  Train Loss: 0.5540 | Train Acc: 98.65%
  Val   Loss: 0.8325 | Val Acc:   86.84%
  Val Macro-F1: 0.8590 | Val LogLoss: 0.5249
  LR: 0.000181
  ⏳ Early Stopping 카운터: 2/15

======================================================================
Epoch 5/50
======================================================================
                                                                                                 
📊 Epoch 5 결과:
  Train Loss: 0.5501 | Train Acc: 98.81%
  Val   Loss: 0.7860 | Val Acc:   89.67%
  Val Macro-F1: 0.8868 | Val LogLoss: 0.4598
  LR: 0.000241
  ✅ 최고 성능(매크로F1) 모델 저장! (Macro-F1: 0.8868)

======================================================================
Epoch 6/50
======================================================================
                                                                                                 
📊 Epoch 6 결과:
  Train Loss: 0.5421 | Train Acc: 99.04%
  Val   Loss: 0.7929 | Val Acc:   88.94%
  Val Macro-F1: 0.8845 | Val LogLoss: 0.4727
  LR: 0.000300
  ⏳ Early Stopping 카운터: 1/15

======================================================================
Epoch 7/50
======================================================================
                                                                                                 
📊 Epoch 7 결과:
  Train Loss: 0.5355 | Train Acc: 99.23%
  Val   Loss: 0.8588 | Val Acc:   85.65%
  Val Macro-F1: 0.8405 | Val LogLoss: 0.5455
  LR: 0.000300
  ⏳ Early Stopping 카운터: 2/15

======================================================================
Epoch 8/50
======================================================================
                                                                                                 
📊 Epoch 8 결과:
  Train Loss: 0.5291 | Train Acc: 99.34%
  Val   Loss: 0.8047 | Val Acc:   88.17%
  Val Macro-F1: 0.8753 | Val LogLoss: 0.4823
  LR: 0.000299
  ⏳ Early Stopping 카운터: 3/15

======================================================================
Epoch 9/50
======================================================================
                                                                                                 
📊 Epoch 9 결과:
  Train Loss: 0.5254 | Train Acc: 99.51%
  Val   Loss: 0.8158 | Val Acc:   88.64%
  Val Macro-F1: 0.8726 | Val LogLoss: 0.4962
  LR: 0.000297
  ⏳ Early Stopping 카운터: 4/15

======================================================================
Epoch 10/50
======================================================================
                                                                                                 
📊 Epoch 10 결과:
  Train Loss: 0.5232 | Train Acc: 99.51%
  Val   Loss: 0.7592 | Val Acc:   90.09%
  Val Macro-F1: 0.8944 | Val LogLoss: 0.4386
  LR: 0.000294
  ✅ 최고 성능(매크로F1) 모델 저장! (Macro-F1: 0.8944)

======================================================================
Epoch 11/50
======================================================================
                                                                                                 
📊 Epoch 11 결과:
  Train Loss: 0.5227 | Train Acc: 99.53%
  Val   Loss: 0.8097 | Val Acc:   87.84%
  Val Macro-F1: 0.8716 | Val LogLoss: 0.4940
  LR: 0.000291
  ⏳ Early Stopping 카운터: 1/15

======================================================================
Epoch 12/50
======================================================================
                                                                                                 
📊 Epoch 12 결과:
  Train Loss: 0.5253 | Train Acc: 99.45%
  Val   Loss: 0.8132 | Val Acc:   88.64%
  Val Macro-F1: 0.8722 | Val LogLoss: 0.4948
  LR: 0.000287
  ⏳ Early Stopping 카운터: 2/15

======================================================================
Epoch 13/50
======================================================================
                                                                                                 
📊 Epoch 13 결과:
  Train Loss: 0.5233 | Train Acc: 99.47%
  Val   Loss: 0.8518 | Val Acc:   85.10%
  Val Macro-F1: 0.8468 | Val LogLoss: 0.5540
  LR: 0.000282
  ⏳ Early Stopping 카운터: 3/15

======================================================================
Epoch 14/50
======================================================================
                                                                                                 
📊 Epoch 14 결과:
  Train Loss: 0.5193 | Train Acc: 99.61%
  Val   Loss: 0.8297 | Val Acc:   86.35%
  Val Macro-F1: 0.8540 | Val LogLoss: 0.5202
  LR: 0.000277
  ⏳ Early Stopping 카운터: 4/15

======================================================================
Epoch 15/50
======================================================================
                                                                                                 
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[12], line 15
     11 print("==== Resnet v0 ====")
     14 # Inception-C부터 학습
---> 15 fold_result = train_fold(
     16     fold_idx, 
     17     train_drivers, 
     18     val_drivers,
     19 )
     21 all_fold_results.append(fold_result)
     23 # 메모리 정리

Cell In[11], line 167, in train_fold(fold_idx, train_drivers, val_drivers)
    164 train_loss, train_correct, train_total = 0.0, 0, 0
    166 pbar = tqdm(train_loader, desc='Training', leave=False)
--> 167 for images, labels in pbar:
    168     images = images.to(device, non_blocking=True)
    169     labels = labels.to(device, non_blocking=True)

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\tqdm\std.py:1181, in tqdm.__iter__(self)
   1178 time = self._time
   1180 try:
-> 1181     for obj in iterable:
   1182         yield obj
   1183         # Update and possibly print the progressbar.
   1184         # Note: does not call self.update(1) for speed optimisation.

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torch\utils\data\dataloader.py:732, in _BaseDataLoaderIter.__next__(self)
    729 if self._sampler_iter is None:
    730     # TODO(https://github.com/pytorch/pytorch/issues/76750)
    731     self._reset()  # type: ignore[call-arg]
--> 732 data = self._next_data()
    733 self._num_yielded += 1
    734 if (
    735     self._dataset_kind == _DatasetKind.Iterable
    736     and self._IterableDataset_len_called is not None
    737     and self._num_yielded > self._IterableDataset_len_called
    738 ):

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torch\utils\data\dataloader.py:788, in _SingleProcessDataLoaderIter._next_data(self)
    786 def _next_data(self):
    787     index = self._next_index()  # may raise StopIteration
--> 788     data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    789     if self._pin_memory:
    790         data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torch\utils\data\_utils\fetch.py:52, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
     50         data = self.dataset.__getitems__(possibly_batched_index)
     51     else:
---> 52         data = [self.dataset[idx] for idx in possibly_batched_index]
     53 else:
     54     data = self.dataset[possibly_batched_index]

Cell In[5], line 36, in DriverDataset.__getitem__(self, idx)
     33 image = Image.open(img_path).convert('RGB')
     35 if self.transform:
---> 36     image = self.transform(image)
     38 if self.is_test:
     39     return image, os.path.basename(img_path)

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torchvision\transforms\transforms.py:95, in Compose.__call__(self, img)
     93 def __call__(self, img):
     94     for t in self.transforms:
---> 95         img = t(img)
     96     return img

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torch\nn\modules\module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs)
   1773     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1774 else:
-> 1775     return self._call_impl(*args, **kwargs)

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torch\nn\modules\module.py:1786, in Module._call_impl(self, *args, **kwargs)
   1781 # If we don't have any hooks, we want to skip the rest of the logic in
   1782 # this function, and just call forward.
   1783 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1784         or _global_backward_pre_hooks or _global_backward_hooks
   1785         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1786     return forward_call(*args, **kwargs)
   1788 result = None
   1789 called_always_called_hooks = set()

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torchvision\transforms\transforms.py:1278, in ColorJitter.forward(self, img)
   1276     img = F.adjust_contrast(img, contrast_factor)
   1277 elif fn_id == 2 and saturation_factor is not None:
-> 1278     img = F.adjust_saturation(img, saturation_factor)
   1279 elif fn_id == 3 and hue_factor is not None:
   1280     img = F.adjust_hue(img, hue_factor)

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torchvision\transforms\functional.py:912, in adjust_saturation(img, saturation_factor)
    907         return F_pil.adjust_contrast(img, contrast_factor)
    909     return F_t.adjust_contrast(img, contrast_factor)
--> 912 def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor:
    913     """Adjust color saturation of an image.
    914 
    915     Args:
   (...)    924         PIL Image or Tensor: Saturation adjusted image.
    925     """
    926     if not torch.jit.is_scripting() and not torch.jit.is_tracing():

KeyboardInterrupt: 
In [13]:
# ========== 학습 곡선 시각화 (4개 그래프) ==========

fig, axes = plt.subplots(2, 2, figsize=(16, 12))

for result in all_fold_results:
    fold_idx = result['fold']
    history = result['history']
    stopped_epoch = result['stopped_epoch']
    best_val_loss = result['best_val_loss']
    
    # 1. Loss 그래프
    ax1 = axes[0, 0]
    epochs = range(1, len(history['train_loss']) + 1)
    ax1.plot(epochs, history['train_loss'], label='Train Loss', 
             marker='o', linewidth=2, alpha=0.8, color='#1f77b4')
    ax1.plot(epochs, history['val_loss'], label='Val Loss', 
             marker='s', linewidth=2, alpha=0.8, color='#ff7f0e')
    
    # 최저 Val Loss 지점 표시
    best_epoch = np.argmin(history['val_loss']) + 1
    ax1.scatter(best_epoch, best_val_loss, color='red', s=250, zorder=5, 
                marker='*', edgecolors='black', linewidths=2,
                label=f'Best (Epoch {best_epoch})')
    
    # Early Stopping 지점 표시
    if stopped_epoch < num_epochs:
        ax1.axvline(stopped_epoch, color='red', linestyle='--', 
                   linewidth=2, alpha=0.5, label=f'Early Stop (E{stopped_epoch})')
    
    ax1.set_title(f'Fold {fold_idx} - Loss (Multiclass Log Loss)', 
                 fontsize=14, fontweight='bold')
    ax1.set_xlabel('Epoch', fontsize=12)
    ax1.set_ylabel('Loss', fontsize=12)
    ax1.legend(loc='best', fontsize=10)
    ax1.grid(True, alpha=0.3)
    
    # 2. Accuracy 그래프
    ax2 = axes[0, 1]
    ax2.plot(epochs, history['train_acc'], label='Train Acc', 
             marker='o', linewidth=2, alpha=0.8, color='#2ca02c')
    ax2.plot(epochs, history['val_acc'], label='Val Acc', 
             marker='s', linewidth=2, alpha=0.8, color='#d62728')
    
    # 최고 Val Acc 지점 표시
    best_acc_epoch = np.argmax(history['val_acc']) + 1
    best_val_acc = max(history['val_acc'])
    ax2.scatter(best_acc_epoch, best_val_acc, color='green', s=250, zorder=5,
                marker='*', edgecolors='black', linewidths=2,
                label=f'Best (Epoch {best_acc_epoch})')
    
    if stopped_epoch < num_epochs:
        ax2.axvline(stopped_epoch, color='red', linestyle='--', 
                   linewidth=2, alpha=0.5, label=f'Early Stop (E{stopped_epoch})')
    
    ax2.set_title(f'Fold {fold_idx} - Accuracy', fontsize=14, fontweight='bold')
    ax2.set_xlabel('Epoch', fontsize=12)
    ax2.set_ylabel('Accuracy (%)', fontsize=12)
    ax2.legend(loc='best', fontsize=10)
    ax2.grid(True, alpha=0.3)
    
    # 3. Learning Rate 그래프
    ax3 = axes[1, 0]
    ax3.plot(epochs, history['learning_rates'], marker='o', linewidth=2, 
             color='purple', alpha=0.8, label='Learning Rate')
    ax3.set_title(f'Fold {fold_idx} - Learning Rate Schedule', 
                 fontsize=14, fontweight='bold')
    ax3.set_xlabel('Epoch', fontsize=12)
    ax3.set_ylabel('Learning Rate', fontsize=12)
    ax3.set_yscale('log')
    ax3.legend(loc='best', fontsize=10)
    ax3.grid(True, alpha=0.3, which='both')
    
    if stopped_epoch < num_epochs:
        ax3.axvline(stopped_epoch, color='red', linestyle='--', 
                   linewidth=2, alpha=0.5)
    
    # 4. Train vs Val 비교 (Loss & Acc)
    ax4 = axes[1, 1]
    
    # Loss 차이
    loss_diff = np.array(history['train_loss']) - np.array(history['val_loss'])
    ax4_twin = ax4.twinx()
    
    ax4.plot(epochs, loss_diff, marker='o', linewidth=2, 
            color='#e377c2', alpha=0.7, label='Loss Diff (Train - Val)')
    ax4.axhline(0, color='gray', linestyle='--', linewidth=1)
    ax4.set_xlabel('Epoch', fontsize=12)
    ax4.set_ylabel('Loss Difference', fontsize=12, color='#e377c2')
    ax4.tick_params(axis='y', labelcolor='#e377c2')
    
    # Accuracy 차이
    acc_diff = np.array(history['train_acc']) - np.array(history['val_acc'])
    ax4_twin.plot(epochs, acc_diff, marker='s', linewidth=2,
                 color='#bcbd22', alpha=0.7, label='Acc Diff (Train - Val)')
    ax4_twin.set_ylabel('Accuracy Difference (%)', fontsize=12, color='#bcbd22')
    ax4_twin.tick_params(axis='y', labelcolor='#bcbd22')
    
    ax4.set_title(f'Fold {fold_idx} - Overfitting 모니터링', 
                 fontsize=14, fontweight='bold')
    ax4.grid(True, alpha=0.3)
    
    # Legend 통합
    lines1, labels1 = ax4.get_legend_handles_labels()
    lines2, labels2 = ax4_twin.get_legend_handles_labels()
    ax4.legend(lines1 + lines2, labels1 + labels2, loc='best', fontsize=9)
    
    if stopped_epoch < num_epochs:
        ax4.axvline(stopped_epoch, color='red', linestyle='--', 
                   linewidth=2, alpha=0.5)

plt.tight_layout()
plt.savefig(f'./plots/loss_curve/inception_v{version}_fold{fold_idx}_detailed.png', 
           dpi=300, bbox_inches='tight')
plt.show()

# ========== 통계 출력 ==========
print("\n" + "="*70)
print("📈 학습 통계 상세")
print("="*70)
print(f"초기 Train Loss: {history['train_loss'][0]:.4f}")
print(f"최종 Train Loss: {history['train_loss'][-1]:.4f}")
print(f"최저 Train Loss: {min(history['train_loss']):.4f} (Epoch {np.argmin(history['train_loss'])+1})")
print(f"\n초기 Val Loss: {history['val_loss'][0]:.4f}")
print(f"최종 Val Loss: {history['val_loss'][-1]:.4f}")
print(f"최저 Val Loss: {min(history['val_loss']):.4f} (Epoch {np.argmin(history['val_loss'])+1})")
print(f"\n초기 Train Acc: {history['train_acc'][0]:.2f}%")
print(f"최종 Train Acc: {history['train_acc'][-1]:.2f}%")
print(f"최고 Train Acc: {max(history['train_acc']):.2f}% (Epoch {np.argmax(history['train_acc'])+1})")
print(f"\n초기 Val Acc: {history['val_acc'][0]:.2f}%")
print(f"최종 Val Acc: {history['val_acc'][-1]:.2f}%")
print(f"최고 Val Acc: {max(history['val_acc']):.2f}% (Epoch {np.argmax(history['val_acc'])+1})")
print(f"\n초기 LR: {history['learning_rates'][0]:.6f}")
print(f"최종 LR: {history['learning_rates'][-1]:.6f}")
print(f"LR 변경 횟수: {len(set(history['learning_rates'])) - 1}회")
print("="*70)

# ========== Overfitting 분석 ==========
final_loss_gap = history['train_loss'][-1] - history['val_loss'][-1]
final_acc_gap = history['train_acc'][-1] - history['val_acc'][-1]

print("\n" + "="*70)
print("🔍 Overfitting 분석")
print("="*70)
print(f"최종 Loss 차이 (Train - Val): {final_loss_gap:+.4f}")
print(f"최종 Acc 차이 (Train - Val): {final_acc_gap:+.2f}%")

if final_acc_gap > 10:
    print("⚠️ 경고: 심각한 Overfitting 감지! (Acc 차이 > 10%)")
elif final_acc_gap > 5:
    print("⚠️ 주의: 약간의 Overfitting 감지 (Acc 차이 > 5%)")
else:
    print("✅ 양호: Overfitting이 잘 제어되고 있습니다.")

print("="*70)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[13], line 112
    108         ax4.axvline(stopped_epoch, color='red', linestyle='--', 
    109                    linewidth=2, alpha=0.5)
    111 plt.tight_layout()
--> 112 plt.savefig(f'./plots/loss_curve/inception_v{version}_fold{fold_idx}_detailed.png', 
    113            dpi=300, bbox_inches='tight')
    114 plt.show()
    116 # ========== 통계 출력 ==========

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\pyplot.py:1250, in savefig(*args, **kwargs)
   1247 fig = gcf()
   1248 # savefig default implementation has no return, so mypy is unhappy
   1249 # presumably this is here because subclasses can return?
-> 1250 res = fig.savefig(*args, **kwargs)  # type: ignore[func-returns-value]
   1251 fig.canvas.draw_idle()  # Need this if 'transparent=True', to reset colors.
   1252 return res

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\figure.py:3490, in Figure.savefig(self, fname, transparent, **kwargs)
   3488     for ax in self.axes:
   3489         _recursively_make_axes_transparent(stack, ax)
-> 3490 self.canvas.print_figure(fname, **kwargs)

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\backend_bases.py:2186, in FigureCanvasBase.print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
   2182 try:
   2183     # _get_renderer may change the figure dpi (as vector formats
   2184     # force the figure dpi to 72), so we need to set it again here.
   2185     with cbook._setattr_cm(self.figure, dpi=dpi):
-> 2186         result = print_method(
   2187             filename,
   2188             facecolor=facecolor,
   2189             edgecolor=edgecolor,
   2190             orientation=orientation,
   2191             bbox_inches_restore=_bbox_inches_restore,
   2192             **kwargs)
   2193 finally:
   2194     if bbox_inches and restore_bbox:

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\backend_bases.py:2042, in FigureCanvasBase._switch_canvas_and_return_print_method.<locals>.<lambda>(*args, **kwargs)
   2038     optional_kws = {  # Passed by print_figure for other renderers.
   2039         "dpi", "facecolor", "edgecolor", "orientation",
   2040         "bbox_inches_restore"}
   2041     skip = optional_kws - {*inspect.signature(meth).parameters}
-> 2042     print_method = functools.wraps(meth)(lambda *args, **kwargs: meth(
   2043         *args, **{k: v for k, v in kwargs.items() if k not in skip}))
   2044 else:  # Let third-parties do as they see fit.
   2045     print_method = meth

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\backends\backend_agg.py:481, in FigureCanvasAgg.print_png(self, filename_or_obj, metadata, pil_kwargs)
    434 def print_png(self, filename_or_obj, *, metadata=None, pil_kwargs=None):
    435     """
    436     Write the figure to a PNG file.
    437 
   (...)    479         *metadata*, including the default 'Software' key.
    480     """
--> 481     self._print_pil(filename_or_obj, "png", pil_kwargs, metadata)

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\backends\backend_agg.py:430, in FigureCanvasAgg._print_pil(self, filename_or_obj, fmt, pil_kwargs, metadata)
    425 """
    426 Draw the canvas, then save it using `.image.imsave` (to which
    427 *pil_kwargs* and *metadata* are forwarded).
    428 """
    429 FigureCanvasAgg.draw(self)
--> 430 mpl.image.imsave(
    431     filename_or_obj, self.buffer_rgba(), format=fmt, origin="upper",
    432     dpi=self.figure.dpi, metadata=metadata, pil_kwargs=pil_kwargs)

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\image.py:1657, in imsave(fname, arr, vmin, vmax, cmap, format, origin, dpi, metadata, pil_kwargs)
   1655 pil_kwargs.setdefault("format", format)
   1656 pil_kwargs.setdefault("dpi", (dpi, dpi))
-> 1657 image.save(fname, **pil_kwargs)

File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\PIL\Image.py:2583, in Image.save(self, fp, format, **params)
   2581         fp = builtins.open(filename, "r+b")
   2582     else:
-> 2583         fp = builtins.open(filename, "w+b")
   2584 else:
   2585     fp = cast(IO[bytes], fp)

FileNotFoundError: [Errno 2] No such file or directory: './plots/loss_curve/inception_v0.0_fold2_detailed.png'
No description has been provided for this image

Submission¶

In [23]:
# 학습 결과에서 Fold 2의 정보를 추출합니다.
# all_fold_results에는 현재 단 하나의 결과(Fold 2)만 들어있어야 합니다.
best_model_path = "./models/best_exp1_resnet50d_fold2.pth"
model = timm.create_model(
        'resnet26d',
        pretrained=True,
        num_classes=num_classes
    ).to(device)

if not all_fold_results:
    print("🚨 오류: 학습된 폴드 결과가 없습니다. train_fold 함수를 먼저 실행하세요.")
else:
    result = all_fold_results[0]
    fold_idx = result['fold']
    model_path = result['model_path']
    
    print("\n" + "=" * 70)
    print(f"🔮 Fold {fold_idx} 단일 모델 예측 시작")
    print("=" * 70)

    # 테스트 데이터셋 생성
    # test_dir은 'data/imgs/test'여야 합니다. 
    
    test_dataset = DriverDataset(
        test_dir, driver_df, [], 
        transform=team_transform_eval, is_test=True
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers
    )

    print(f"테스트 샘플: {len(test_dataset)}개")

    # --- 모델 로드 및 예측 ---

    
    # 2. 저장된 최저 Loss 가중치 로드
    print(f"\n📁 모델 로드 중: {model_path}...")
    try:
        checkpoint = torch.load(model_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        print(f"✓ Fold {fold_idx} 모델 (Epoch {checkpoint['epoch']+1}, Val Loss: {checkpoint.get('val_loss', 'N/A'):.4f}) 로드 완료.")
    except Exception as e:
        print(f"🚨 모델 로드 실패: {e}")
        # 이 시점에서 코드 실행을 멈추거나, 오류 처리를 진행할 수 있습니다.
        raise
        
    model = model.to(device)
    model.eval() # 평가 모드 설정
    
    # 3. 예측 실행
    predictions = []
    img_names = []
    
    with torch.no_grad():
        for images, filenames in tqdm(test_loader, desc=f'Fold {fold_idx} 예측'):
            images = images.to(device)
            outputs = model(images)
            
            # 4. 확률 (Softmax) 계산
            probs = torch.softmax(outputs, dim=1)
            predictions.append(probs.cpu().numpy())
            
            img_names.extend(filenames)
    
    final_predictions = np.vstack(predictions)
    print(f"\n✓ 예측 완료: {final_predictions.shape}")

    # --- Submission 파일 생성 ---
    
    # 클래스 이름을 c0, c1, ... c9로 설정
    class_cols = [f'c{i}' for i in range(num_classes)]

    submission_data = {'img': img_names}
    for i, col in enumerate(class_cols):
        submission_data[col] = final_predictions[:, i]
    
    submission = pd.DataFrame(submission_data)

    submission_file = f'inception_v{version}.csv'
    submission.to_csv("./submissions/"+submission_file, index=False)

    print("\n" + "=" * 70)
    print(f"✅ Submission 파일 생성 완료: {submission_file}")
    print(f"✅ 총 {len(submission)}개 이미지 예측")
    print("=" * 70)
    
    print("\n📋 Submission 샘플:")
    print(submission.head())
🚨 오류: 학습된 폴드 결과가 없습니다. train_fold 함수를 먼저 실행하세요.

Analysis¶

In [ ]:
# 셀 추가: Confusion Matrix 분석
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

def analyze_predictions(model, val_loader, device):
    """검증 데이터로 상세 분석"""
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []
    
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc='예측 중'):
            images = images.to(device)
            outputs = model(images)
            probs = torch.softmax(outputs, dim=1)
            _, preds = torch.max(outputs, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
    
    all_probs = np.vstack(all_probs)
    return np.array(all_labels), np.array(all_preds), all_probs

# 모델 로드
model = timm.create_model(
        'resnet26d',
        pretrained=True,
        num_classes=num_classes
    ).to(device)
checkpoint = torch.load(best_model_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model = model.to(device)
try:
    os.mkdir(f'./plots/resnet_v{version}')
except:
    pass

# 예측
fold_info = fold_splits[1] 

fold_idx = fold_info['fold']
train_drivers = fold_info['train_drivers']
val_drivers = fold_info['val_drivers']

val_dataset = DriverDataset(
    train_dir, driver_df, val_drivers,
    transform=team_transform_eval, is_test=False
)
val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
)
y_true, y_pred, y_probs = analyze_predictions(model, val_loader, device)

# 1. Confusion Matrix
class_names = [f'c{i}' for i in range(10)]
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names,
            cbar_kws={'label': 'Count'})
plt.title('Confusion Matrix', fontsize=16, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.savefig(f'plots/resnet_v{version}/confusion_matrix.png', dpi=300)
plt.show()

# 2. Normalized Confusion Matrix (비율)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

plt.figure(figsize=(12, 10))
sns.heatmap(cm_normalized, annot=True, fmt='.2%', cmap='YlOrRd',
            xticklabels=class_names, yticklabels=class_names)
plt.title('Normalized Confusion Matrix (%)', fontsize=16, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.savefig(f'plots/resnet_v{version}/confusion_matrix_normalized.png', dpi=300)
plt.show()

# 3. Classification Report
print("\n" + "="*70)
print("📊 Classification Report")
print("="*70)
print(classification_report(y_true, y_pred, target_names=class_names, digits=4))
운전자5명, 데이터 4006개 이미지
예측 중: 100%|██████████| 63/63 [00:22<00:00,  2.74it/s]
No description has been provided for this image
No description has been provided for this image
======================================================================
📊 Classification Report
======================================================================
              precision    recall  f1-score   support

          c0     0.8033    0.7505    0.7760       457
          c1     0.9976    0.8884    0.9398       466
          c2     0.9977    0.9884    0.9930       430
          c3     1.0000    0.9905    0.9952       423
          c4     0.9184    0.9955    0.9554       441
          c5     0.9524    0.9756    0.9639       410
          c6     0.8785    0.9216    0.8995       408
          c7     0.8966    0.9873    0.9398       316
          c8     0.8049    0.6923    0.7444       286
          c9     0.7093    0.7669    0.7370       369

    accuracy                         0.9009      4006
   macro avg     0.8959    0.8957    0.8944      4006
weighted avg     0.9019    0.9009    0.9001      4006

In [26]:
# 클래스별 성능 분석
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

if 'y_true' not in globals() or 'y_pred' not in globals() or 'y_probs' not in globals():
    print("y_true/y_pred/y_probs가 없음 — analyze_predictions 실행 중...")
    y_true, y_pred, y_probs = analyze_predictions(model, val_loader, device)
else:
    print("y_true/y_pred/y_probs 이미 존재, 재계산 생략")

precision, recall, f1, support = precision_recall_fscore_support(
    y_true, y_pred, average=None
)

# 데이터프레임으로 정리
class_performance = pd.DataFrame({
    'Class': class_names,
    'Precision': precision,
    'Recall': recall,
    'F1-Score': f1,
    'Support': support,
    'Accuracy': [accuracy_score(y_true[y_true==i], y_pred[y_true==i]) 
                 if np.sum(y_true==i) > 0 else 0 for i in range(10)]
})

print("\n" + "="*70)
print("📈 클래스별 성능")
print("="*70)
print(class_performance.to_string(index=False))

# 시각화
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Precision
axes[0, 0].bar(class_names, precision, color='skyblue', alpha=0.8)
axes[0, 0].set_title('Precision by Class', fontweight='bold')
axes[0, 0].set_ylabel('Precision')
axes[0, 0].set_ylim([0, 1.1])
axes[0, 0].grid(axis='y', alpha=0.3)

# Recall
axes[0, 1].bar(class_names, recall, color='lightcoral', alpha=0.8)
axes[0, 1].set_title('Recall by Class', fontweight='bold')
axes[0, 1].set_ylabel('Recall')
axes[0, 1].set_ylim([0, 1.1])
axes[0, 1].grid(axis='y', alpha=0.3)

# F1-Score
axes[1, 0].bar(class_names, f1, color='lightgreen', alpha=0.8)
axes[1, 0].set_title('F1-Score by Class', fontweight='bold')
axes[1, 0].set_ylabel('F1-Score')
axes[1, 0].set_ylim([0, 1.1])
axes[1, 0].grid(axis='y', alpha=0.3)

# Support
axes[1, 1].bar(class_names, support, color='plum', alpha=0.8)
axes[1, 1].set_title('Support (Sample Count) by Class', fontweight='bold')
axes[1, 1].set_ylabel('Count')
axes[1, 1].grid(axis='y', alpha=0.3)

plt.tight_layout()
os.mkdir(f'plots/resnet_v{version}')
plt.savefig(f'plots/resnet_v{version}/class_performance.png', dpi=300)
plt.show()
y_true/y_pred/y_probs 이미 존재, 재계산 생략

======================================================================
📈 클래스별 성능
======================================================================
Class  Precision   Recall  F1-Score  Support  Accuracy
   c0   0.803279 0.750547  0.776018      457  0.750547
   c1   0.997590 0.888412  0.939841      466  0.888412
   c2   0.997653 0.988372  0.992991      430  0.988372
   c3   1.000000 0.990544  0.995249      423  0.990544
   c4   0.918410 0.995465  0.955386      441  0.995465
   c5   0.952381 0.975610  0.963855      410  0.975610
   c6   0.878505 0.921569  0.899522      408  0.921569
   c7   0.896552 0.987342  0.939759      316  0.987342
   c8   0.804878 0.692308  0.744361      286  0.692308
   c9   0.709273 0.766938  0.736979      369  0.766938
No description has been provided for this image
In [30]:
# ROC Curve 분석
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

# One-hot encoding
y_true_bin = label_binarize(y_true, classes=range(10))

# 각 클래스별 ROC Curve
plt.figure(figsize=(14, 10))

for i in range(10):
    fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_probs[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{class_names[i]} (AUC = {roc_auc:.3f})')

plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Classifier')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontsize=12)
plt.ylabel('True Positive Rate', fontsize=12)
plt.title('ROC Curves (Multi-class)', fontsize=16, fontweight='bold')
plt.legend(loc='lower right', fontsize=10)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig(f'plots/resnet_v{version}/roc_curves.png', dpi=300)
plt.show()
No description has been provided for this image
In [ ]:
 
In [32]:
# Grad-CAM 구현
import torch.nn.functional as F
from torchvision.transforms import ToPILImage

class GradCAM:
    """Grad-CAM (ResNet 호환)"""
    def __init__(self, model, target_module=None):
        self.model = model
        self.target_module = target_module
        self.gradients = None
        self.activations = None

        # target_module이 None이면 모델에서 마지막 Conv2d를 찾아 사용
        if self.target_module is None:
            self.target_module, _ = get_last_conv_layer_resnet(self.model)
            if self.target_module is None:
                raise RuntimeError("마지막 Conv 레이어를 찾을 수 없습니다.")

        # 훅 등록 (forward / backward)
        self.target_module.register_forward_hook(self._save_activation)
        # 최신 PyTorch에서는 register_full_backward_hook 권장, 없으면 register_backward_hook 사용
        if hasattr(self.target_module, "register_full_backward_hook"):
            self.target_module.register_full_backward_hook(self._save_gradient)
        else:
            # 구버전 호환
            self.target_module.register_backward_hook(self._save_gradient)

    def _save_activation(self, module, input, output):
        self.activations = output.detach()

    def _save_gradient(self, module, grad_input, grad_output):
        # grad_output는 tuple
        self.gradients = grad_output[0].detach()

    def generate_cam(self, input_tensor, target_class=None):
        """
        input_tensor: [1, C, H, W] (이미 정규화된 텐서)
        target_class: int or None (None이면 argmax 사용)
        returns: cam (H, W) float numpy [0..1], logits tensor
        """
        self.model.eval()
        self.gradients = None
        self.activations = None

        logits = self.model(input_tensor)  # forward
        if target_class is None:
            target_class = int(logits.argmax(dim=1)[0])

        # backward on the score of target_class
        self.model.zero_grad()
        score = logits[0, target_class]
        score.backward(retain_graph=True)

        if self.gradients is None or self.activations is None:
            raise RuntimeError("Gradients or activations not recorded. Check hooks.")

        # gradients: [1, C, H, W] -> use [C, H, W]
        gradients = self.gradients[0].cpu()
        activations = self.activations[0].cpu()

        # global average pooling of gradients -> weights [C]
        weights = gradients.mean(dim=(1, 2))  # [C]

        # weighted sum of activations
        cam = (weights[:, None, None] * activations).sum(dim=0)  # [H, W]
        cam = F.relu(cam)
        cam = cam - cam.min()
        if cam.max() > 0:
            cam = cam / cam.max()
        cam_np = cam.numpy().astype(np.float32)

        return cam_np, logits

def get_last_conv_layer_resnet(model):
    """
    ResNet 계열 모델에서 마지막 nn.Conv2d 모듈을 찾아 반환
    반환: (module, name) 또는 (None, None)
    """
    for name, module in reversed(list(model.named_modules())):
        if isinstance(module, torch.nn.Conv2d):
            return module, name
    return None, None

def visualize_gradcam(model, image, true_label, pred_label, device):
    """Grad-CAM 시각화 (ResNet)"""
    # target layer 찾기/설정
    target_module, target_name = get_last_conv_layer_resnet(model)
    if target_module is None:
        print("⚠️ 마지막 Conv 레이어를 찾을 수 없습니다.")
        return

    gradcam = GradCAM(model, target_module)

    # 이미지는 [C, H, W] (정규화된 텐서)
    input_tensor = image.unsqueeze(0).to(device)
    cam, output = gradcam.generate_cam(input_tensor, target_class=pred_label)

    # 원본 이미지 복원 (정규화 역변환)
    img_np = image.cpu().numpy().transpose(1, 2, 0)
    img_np = img_np * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
    img_np = np.clip(img_np, 0, 1)

    # cam은 [H, W], float [0..1] -> uint8로 변환 후 PIL 리사이즈
    H, W = img_np.shape[:2]
    cam_uint8 = np.uint8(255 * cam)
    cam_pil = Image.fromarray(cam_uint8).resize((W, H), Image.BILINEAR)
    cam_resized = np.array(cam_pil) / 255.0

    # 시각화
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))

    axes[0].imshow(img_np)
    axes[0].set_title(f'Original\nTrue: c{true_label}', fontsize=12)
    axes[0].axis('off')

    axes[1].imshow(img_np)
    axes[1].imshow(cam_resized, cmap='jet', alpha=0.5)
    axes[1].set_title(f'Grad-CAM\nPred: c{pred_label}', fontsize=12)
    axes[1].axis('off')

    axes[2].imshow(cam_resized, cmap='jet')
    axes[2].set_title('Heatmap', fontsize=12)
    axes[2].axis('off')

    plt.tight_layout()
    return fig

# 샘플 이미지에 Grad-CAM 적용
print("\n" + "="*70)
print("🔥 Grad-CAM 시각화")
print("="*70)

model = timm.create_model(
        'resnet26d',
        pretrained=True,
        num_classes=num_classes
    ).to(device)
checkpoint = torch.load(best_model_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model = model.to(device)
model.eval()

# 검증 데이터에서 샘플 추출 (정답/오답 각각)
correct_samples = []
incorrect_samples = []

for i, (images, labels) in enumerate(val_loader):
    if len(correct_samples) >= 3 and len(incorrect_samples) >= 3:
        break
    
    images = images.to(device)
    labels = labels.to(device)
    
    with torch.no_grad():
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
    
    for j in range(len(images)):
        if preds[j] == labels[j] and len(correct_samples) < 3:
            correct_samples.append((images[j], labels[j].item(), preds[j].item()))
        elif preds[j] != labels[j] and len(incorrect_samples) < 3:
            incorrect_samples.append((images[j], labels[j].item(), preds[j].item()))

# 정답 샘플 시각화
print("\n✅ 정답 예측 샘플")
for idx, (img, true_label, pred_label) in enumerate(correct_samples):
    fig = visualize_gradcam(model, img, true_label, pred_label, device)
    plt.savefig(f'plots/resnet_v{version}/gradcam_correct_{idx}.png', dpi=300, bbox_inches='tight')
    plt.show()

# 오답 샘플 시각화
print("\n❌ 오답 예측 샘플")
for idx, (img, true_label, pred_label) in enumerate(incorrect_samples):
    fig = visualize_gradcam(model, img, true_label, pred_label, device)
    plt.savefig(f'plots/resnet_v{version}/gradcam_incorrect_{idx}.png', dpi=300, bbox_inches='tight')
    plt.show()
======================================================================
🔥 Grad-CAM 시각화
======================================================================

✅ 정답 예측 샘플
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
❌ 오답 예측 샘플
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [35]:
# Feature Map 추출 및 시각화
class FeatureExtractor:
    """중간 레이어의 Feature Map 추출"""
    
    def __init__(self, model, layer_names):
        self.model = model
        self.layer_names = layer_names
        self.features = {}
        
        # Hook 등록
        for name, layer in model.named_modules():
            if name in layer_names:
                layer.register_forward_hook(self.save_feature(name))
    
    def save_feature(self, name):
        def hook(module, input, output):
            self.features[name] = output.detach()
        return hook
    
    def extract(self, x):
        self.features = {}
        _ = self.model(x)
        return self.features

def visualize_feature_maps(features, layer_name, max_channels=16):
    """Feature Map 시각화"""
    feature = features[layer_name][0]  # 첫 번째 배치
    num_channels = min(feature.shape[0], max_channels)
    
    # Grid 크기 계산
    grid_size = int(np.ceil(np.sqrt(num_channels)))
    
    fig, axes = plt.subplots(grid_size, grid_size, figsize=(15, 15))
    axes = axes.flatten()
    
    for i in range(num_channels):
        fmap = feature[i].cpu().numpy()
        axes[i].imshow(fmap, cmap='viridis')
        axes[i].set_title(f'Ch {i}', fontsize=8)
        axes[i].axis('off')
    
    # 빈 subplot 숨기기
    for i in range(num_channels, len(axes)):
        axes[i].axis('off')
    
    plt.suptitle(f'Feature Maps: {layer_name}', fontsize=16, fontweight='bold')
    plt.tight_layout()
    return fig

# Inception V4의 주요 레이어 이름 확인
print("\n" + "="*70)
print("🔍 모델 구조 탐색")
print("="*70)

for name, module in model.named_modules():
    if isinstance(module, nn.Conv2d):
        print(f"Conv Layer: {name}")

# 주요 레이어 선택 (예시)
target_layers = [
    'conv1.6',  # 첫 번째 Conv
    'layer1.0.conv3',  # 중간 블록
    'layer2.0.conv3',  # 후반 블록
    'layer3.0.conv3',
    'layer4.0.conv3',
    'layer4.1.conv3'

]

# 샘플 이미지로 Feature Map 추출
sample_image, sample_label = next(iter(val_loader))
sample_image = sample_image[0:1].to(device)  # 첫 번째 이미지

extractor = FeatureExtractor(model, target_layers)
features = extractor.extract(sample_image)

# 각 레이어 시각화
for layer_name in target_layers:
    if layer_name in features:
        fig = visualize_feature_maps(features, layer_name, max_channels=16)
        plt.savefig(f'plots/resnet_v{version}/feature_map_{layer_name.replace(".", "_")}.png', dpi=300)
        plt.show()
======================================================================
🔍 모델 구조 탐색
======================================================================
Conv Layer: conv1.0
Conv Layer: conv1.3
Conv Layer: conv1.6
Conv Layer: layer1.0.conv1
Conv Layer: layer1.0.conv2
Conv Layer: layer1.0.conv3
Conv Layer: layer1.0.downsample.1
Conv Layer: layer1.1.conv1
Conv Layer: layer1.1.conv2
Conv Layer: layer1.1.conv3
Conv Layer: layer2.0.conv1
Conv Layer: layer2.0.conv2
Conv Layer: layer2.0.conv3
Conv Layer: layer2.0.downsample.1
Conv Layer: layer2.1.conv1
Conv Layer: layer2.1.conv2
Conv Layer: layer2.1.conv3
Conv Layer: layer3.0.conv1
Conv Layer: layer3.0.conv2
Conv Layer: layer3.0.conv3
Conv Layer: layer3.0.downsample.1
Conv Layer: layer3.1.conv1
Conv Layer: layer3.1.conv2
Conv Layer: layer3.1.conv3
Conv Layer: layer4.0.conv1
Conv Layer: layer4.0.conv2
Conv Layer: layer4.0.conv3
Conv Layer: layer4.0.downsample.1
Conv Layer: layer4.1.conv1
Conv Layer: layer4.1.conv2
Conv Layer: layer4.1.conv3
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
# t-SNE/UMAP으로 임베딩 시각화
from sklearn.manifold import TSNE
# pip install umap-learn
# from umap import UMAP

def extract_embeddings(model, dataloader, device):
    """마지막 FC 레이어 이전의 임베딩 추출"""
    model.eval()
    embeddings = []
    labels = []
    
    # FC 레이어 이전 출력 추출을 위한 Hook
    features = []
    def hook(module, input, output):
        features.append(input[0].detach())
    
    # Inception V4의 마지막 FC 레이어에 Hook
    if hasattr(model, 'last_linear'):
        handle = model.last_linear.register_forward_hook(hook)
    elif hasattr(model, 'fc'):
        handle = model.fc.register_forward_hook(hook)
    
    with torch.no_grad():
        for images, lbls in tqdm(dataloader, desc='임베딩 추출'):
            images = images.to(device)
            _ = model(images)
            
            embeddings.append(features[-1].cpu().numpy())
            labels.extend(lbls.numpy())
            features.clear()
    
    handle.remove()
    
    embeddings = np.vstack(embeddings)
    labels = np.array(labels)
    
    return embeddings, labels

# 임베딩 추출
print("\n" + "="*70)
print("🧬 임베딩 추출 중...")
print("="*70)

embeddings, labels = extract_embeddings(model, val_loader, device)
print(f"임베딩 shape: {embeddings.shape}")

# t-SNE 시각화
print("\n📊 t-SNE 계산 중...")
tsne = TSNE(n_components=2, random_state=42, perplexity=30, max_iter=1000)
embeddings_2d = tsne.fit_transform(embeddings)

plt.figure(figsize=(14, 10))
scatter = plt.scatter(
    embeddings_2d[:, 0], 
    embeddings_2d[:, 1], 
    c=labels, 
    cmap='tab10', 
    s=10, 
    alpha=0.6
)
plt.colorbar(scatter, label='Class', ticks=range(10))
plt.title('t-SNE Visualization of Learned Embeddings', fontsize=16, fontweight='bold')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig(f'plots/inception_v{version}/tsne_embeddings.png', dpi=300)
plt.show()

# 클래스별로 색상 구분하여 Legend 추가
plt.figure(figsize=(16, 12))
for class_idx in range(10):
    mask = labels == class_idx
    plt.scatter(
        embeddings_2d[mask, 0],
        embeddings_2d[mask, 1],
        label=f'c{class_idx}',
        s=20,
        alpha=0.7
    )
plt.title('t-SNE Visualization by Class', fontsize=16, fontweight='bold')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.legend(loc='best', fontsize=10)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig(f'plots/inception_v{version}/tsne_embeddings_by_class.png', dpi=300)
plt.show()
======================================================================
🧬 임베딩 추출 중...
======================================================================
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[20], line 44
     41 print("🧬 임베딩 추출 중...")
     42 print("="*70)
---> 44 embeddings, labels = extract_embeddings(model, val_loader, device)
     45 print(f"임베딩 shape: {embeddings.shape}")
     47 # t-SNE 시각화

NameError: name 'model' is not defined
In [ ]:
 
In [ ]: